
/*
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * SPDX-License-Identifier: GPL-3.0+
 * License-Filename: LICENSE
 */

#include "config.h"

#include <stdio.h>
#include <string.h>

#include "splay-tree.h"
#include "main.h"
#include "options.h"
#include "uniqstring.h"
#include "parse.h"

/* Manual dot lexer with tweaks to parse around GCC data bugs
 * and other known bugs in data.
 * there are patches for gcc-6.x data but too late to get it in
 * main so we have to wait for gcc-7.x before it is fixed.
 * wrgetc() and wrungetc() can be used as wrapper to read
 * from network stream, compressed or encrypted data etc.
 * The dot language is not easy because it needs additional
 * parsing in parsed data and it has weird unusual isues.
 * The keywords cannot be simply lexed because they can be
 * used as node name depending on how it is used in graph.
 * the wrgetc() and wrungetc() can also changed to read from
 * buffers, network data etc.
 * dotlex() returns token or 0x00 at end-of-file or error.
 * dotlex() returns only token codes and single char as token DOT_CHAR
 * it is intentional design that dotlex() returns only tokens and not chars
 * stand-alone newline '\n' is skipped as whitespace in main dotlex().
 * a 0x00 char is treated as EOF which makes it possible to
 * add additional data after that point if needed.
 * this is different from dot which allows 0x00 in stream.
 * a @ char is seen as end-of-file in dot language
 * The first token can be a utf-8 begin code that is dot specific.
 * 0xef 0xbb 0xbf skip utf8 code begin-of-message token DOT_UTF8BOM
 * single chars:
 * "{" token DOT_BO brace-open
 * "}" token DOT_BC brace-close
 * "[" token DOT_BRACKETO bracket-open
 * "]" token DOT_BRACKETC bracket-close
 * "," token DOT_COMMA optional in grammar
 * ";" token DOT_SEMIC optional in grammar
 * ":" token DOT_COLON colon
 * "=" token DOT_IS equal
 * a DOT_STRING is between "" and '\"' is changed into " and returned
 * with the backslash control chars to handle by parser. At lexing
 * level '\' \n is ignored and filtered and linecount updated.
 * A dot number can have multiple '.' and may end with ascii chars
 * resulting in unusable number strings but it's dot definition.
 * A dot html label is returned to parse in parse with the start
 * and end '<<' and '>>'.
 */

/*
 * The length in characters of strings, identifiers and numbers is
 * intentionally limited in this lexer to DOT_MAXSTRING chars.
 * Normally no problem and only with GCC compiler data this limit
 * can be reached but with such long label strings the drawing
 * has become unusable already.
 * When the string in yytext is too long then the string is silent
 * truncated to have DOT_MAXSTRING chars.
 * Set this value higher or lower when needed because it is
 * arbitrary chosen and based on experience with GCC dot data.
 * when the string is a shape=record string and it is truncated
 * the matching '{' '}' may fail and because GCC generates too big
 * node shapes this value must be high as possible.
 */
#define DOT_MAXSTRING 265*1024

/* print diag text what is lexed grep for "dot_lex()" */
#define TOKEN_RETURN(x,s,t) \
	do { \
		if (yy_flex_debug) { \
			printf("%s(): line %d token %d %s `%s'\n",__FUNCTION__,yylineno,yyntokens,s,t); \
			fflush (stdout); \
		} \
		return (x); \
	} while (0);

/* wrapped fgetc returns EOF at error */
static int wrgetc(void);

/* wrapped ungetc returns EOF at error */
static int wrungetc(int c);

/* skip whitespace and comments and return non-ws char or 0 at error or EOF */
static int dotlex_skipws(void);

/* file stream to read from */
static FILE *yyin = (FILE *) 0;

/* debug output if set */
static int yy_flex_debug = 0;

/* current line number */
static int yylineno = 1;

/* number of tokens lexed */
static int yyntokens = 0;

/* current char */
static int yyc = 0;

/* buffer for a char */
static char charbuf[2];

/* buffer for a string */
static char strbuf[DOT_MAXSTRING];

/* name of file */
static char *fname = "unknown";

/* */
void dotlexreset(void)
{
	if (yy_flex_debug) {
		printf("%s(): lexed %d tokens util line %d last char is 0x%02x\n", __FUNCTION__, yyntokens, yylineno, (char)yyc);
		fflush(stdout);
	}
	/* reset needed vars */
	yy_flex_debug = 0;
	/* do not reset yylineno */
	fname = "unknown";
	return;
}

/* */
void dotlexinit(FILE * f, char *filename, int dotdebug)
{
	/* set stream */
	if (f) {
		yyin = f;
		fname = filename;
	} else {
		/* shouldnothappen */
		yyin = stdin;
		fname = "yyin";
	}
	/* optional debug output on stdout */
	/* make this 2 to get per char debug log */
	yy_flex_debug = dotdebug;
	/* */
	if (0) {
		yy_flex_debug = 2;
	}
	/* current line number */
	yylineno = 1;
	/* current char */
	yyc = 0;
	/* number of tokens lexed */
	yyntokens = 0;
	/* */
	if (option_parsedebug) {
		printf("%s(): init debug=%d\n", __FUNCTION__, dotdebug);
		fflush(stdout);
	}
	return;
}

/* */
int dotlex_lineno(void)
{
	return (yylineno);
}

/* */
int dotlex_max_strlen(void)
{
	return (DOT_MAXSTRING);
}

/* return token or 0 at error or EOF */
int dotlex(void)
{
	int nest = 1;		/* '<' nesting level */
	int i = 0;

	for (;;) {
		/* skip whitespace and comments */
		yyc = dotlex_skipws();

		/* newline */
		if (yyc == '\n') {
			/* skipped as if  ws char */
			continue;
			/* as token:
			 * yyntokens = (yyntokens + 1);
			 * return ('\n');
			 */
		} else {
			break;
		}
	}

	/* number of tokens lexed */
	if (yyntokens == 0) {
		/* check for utf8 begin code "\xef\xbb\xbf" skip utf8 code begin-of-message */
		if (yyc == 0xef) {
			yyc = wrgetc();
			if (yyc == 0xbb) {
				yyc = wrgetc();
				if (yyc == 0xbf) {
					yyntokens = (yyntokens + 1);
					TOKEN_RETURN(DOT_UTF8BOM, "DOT_UTF8BOM", "utf8bom");
				} else {
					/* lex back to 0xbb and return it as char */
					/* generates parse error */
					yyc = wrungetc(yyc);
					yyc = 0xbb;
					charbuf[0] = (char)0xbb;
					charbuf[1] = 0x00;
					lastchar = uniqstring(charbuf);
					yyntokens = (yyntokens + 1);
					TOKEN_RETURN(DOT_CHAR, "DOT_CHAR", lastchar);
				}
			} else {
				/* lex back to 0xef and return it as char */
				/* generates parse error */
				yyc = wrungetc(yyc);
				yyc = 0xef;
				charbuf[0] = (char)0xef;
				charbuf[1] = 0x00;
				lastchar = uniqstring(charbuf);
				yyntokens = (yyntokens + 1);
				TOKEN_RETURN(DOT_CHAR, "DOT_CHAR", lastchar);
			}
		} else if (yyc == '}') {
			/* Bug 70185 */
			fprintf(stderr,
				"%s(): at line %d file %s token %d possibly known gcc data error only single close brace '}' now upgrade to gcc-7 or apply patch in gcc/passes.c opt_pass::opt_pass (const pass_data &data, context *ctxt)\n",
				__FUNCTION__, yylineno, fname, yyntokens);
			fflush(stderr);
			yyntokens = (yyntokens + 1);
			TOKEN_RETURN(DOT_BC, "DOT_BC", "}");
		} else {
			/* code handled below */
		}
	}

	/* end of file char */
	if (yyc == 0x00) {
		yyntokens = (yyntokens + 1);
		TOKEN_RETURN(0, "0", "end-of-file");
	}

	/* end of file char */
	if (yyc == EOF) {
		yyntokens = (yyntokens + 1);
		TOKEN_RETURN(0, "EOF", "end-of-file");
	}

	/* special dot char used to indicate end-of-file */
	if (yyc == '@') {
		yyntokens = (yyntokens + 1);
		TOKEN_RETURN(0, "@", "end-of-data");
	}

	/* single chars */

	/* start of compound, subgraph, graph in grammar */
	if (yyc == '{') {
		yyntokens = (yyntokens + 1);
		TOKEN_RETURN(DOT_BO, "DOT_BO", "{");
	}

	/* end of compound, subgraph, graph in grammar */
	if (yyc == '}') {
		yyntokens = (yyntokens + 1);
		TOKEN_RETURN(DOT_BC, "DOT_BC", "}");
	}

	/* start of options in grammar */
	if (yyc == '[') {
		yyntokens = (yyntokens + 1);
		TOKEN_RETURN(DOT_BRACKETO, "DOT_BRACKETO", "[");
	}

	/* end of options in grammar */
	if (yyc == ']') {
		yyntokens = (yyntokens + 1);
		TOKEN_RETURN(DOT_BRACKETC, "DOT_BRACKETC", "]");
	}

	/* optional separator in options list in grammar */
	if (yyc == ',') {
		/* optional comma in grammar */
		yyntokens = (yyntokens + 1);
		TOKEN_RETURN(DOT_COMMA, "DOT_COMMA", ",");
	}

	/* optional semi-comma at end-of-statement in grammar */
	/* dot accepts also ';' in node statement option as in foo[bar=baz; color=green]; */
	if (yyc == ';') {
		yyntokens = (yyntokens + 1);
		TOKEN_RETURN(DOT_SEMIC, "DOT_SEMIC", ";");
	}

	/* used at node ports, compass point in grammar */
	if (yyc == ':') {
		yyntokens = (yyntokens + 1);
		TOKEN_RETURN(DOT_COLON, "DOT_COLON", ":");
	}

	/* attribute assignment in grammar */
	if (yyc == '=') {
		yyntokens = (yyntokens + 1);
		TOKEN_RETURN(DOT_IS, "DOT_IS", "=");
	}

	/* '+' as in "string"+"string" */
	if (yyc == '+') {
		yyntokens = (yyntokens + 1);
		TOKEN_RETURN(DOT_PLUS, "DOT_PLUS", "+");
	}

	/* '<' is seen as start of html format string for label */
	if (yyc == '<') {
		nest = 1;	/* '<' nesting level */
		i = 0;
		memset(strbuf, 0, DOT_MAXSTRING);
		/* html string to pass on has also last and final <> */
		strbuf[i] = (char)yyc;
		i = (i + 1);
		for (;;) {
			/* toedoe: to rewrite to get it better */
			yyc = wrgetc();
			if (yyc == EOF) {
				break;
			}
			if (yyc == 0x00) {
				break;
			}
			if (yyc == '<') {
				nest = (nest + 1);
			}
			if (yyc == '>') {
				if (nest > 0) {
					nest = (nest - 1);
				}
			}
			/* '\n' is copied into result */
			/* also copy last '>' in buffer */
			strbuf[i] = (char)yyc;
			i = (i + 1);
			if (i >= (DOT_MAXSTRING - 1)) {
				fprintf(stderr, "%s(): html string too long and truncated to %d chars\n", __FUNCTION__,
					(DOT_MAXSTRING - 1));
				fflush(stderr);
				break;
			}
			if (nest == 0) {
				break;
			}
		}
		/* if buffer is full and still inside html-label read rest of it until the last '>' */
		if (nest) {
			for (;;) {
				yyc = wrgetc();
				if (yyc == EOF) {
					break;
				}
				if (yyc == 0x00) {
					break;
				}
				if (yyc == '<') {
					nest = (nest + 1);
				}
				if (yyc == '>') {
					if (nest > 0) {
						nest = (nest - 1);
					}
				}
				if (nest == 0) {
					break;
				}
			}
		}
		if (strlen(strbuf) == 0) {
			lasthtml = (char *)"";
		} else {
			lasthtml = uniqstring(strbuf);
		}
		/* parse will parse the contents of the html label */
		yyntokens = (yyntokens + 1);
		TOKEN_RETURN(DOT_HTML, "DOT_HTML", lasthtml);
	}

	/* '"' is start of "" string for label */
	if (yyc == '"') {
		nest = 0;
		i = 0;
		memset(strbuf, 0, DOT_MAXSTRING);
		/* do not put the first and last '"' in result string */
		for (;;) {
			yyc = wrgetc();
			nest = (nest + 1);
			/* end of file */
			if (yyc == EOF) {
				yyc = wrungetc(yyc);
				break;
			}
			/* end of file */
			if (yyc == 0x00) {
				yyc = wrungetc(yyc);
				break;
			}
			/* filter some esc chars */
			if (yyc == '\\') {
				yyc = wrgetc();
				nest = (nest + 1);
				/* end of file */
				if (yyc == EOF) {
					/* avoid the last char being a '\' */
					yyc = wrungetc(yyc);
					yyc = '\\';
					break;
				}
				/* end of file */
				if (yyc == 0x00) {
					/* avoid the last char being a '\' */
					yyc = wrungetc(yyc);
					yyc = '\\';
					break;
				}
				if (yyc == '\n') {
					/* skip '\' '\n' as if it is white space */
					continue;
				}
				if (yyc == '"') {
					/* '\' '"' is changed into a single '"' */
					if (i < (DOT_MAXSTRING - 1)) {
						strbuf[i] = (char)yyc;
						i = (i + 1);
					}
					continue;
				}
				if (yyc == '\\') {
					/* copy '\' '\' as such */
					if ((i + 1) < (DOT_MAXSTRING - 1)) {
						strbuf[i] = (char)'\\';
						i = (i + 1);
						strbuf[i] = (char)yyc;
						i = (i + 1);
					}
					continue;
				}
				/* just copy any '\' char */
				if ((i + 1) < (DOT_MAXSTRING - 1)) {
					strbuf[i] = (char)'\\';
					i = (i + 1);
					strbuf[i] = (char)yyc;
					i = (i + 1);
				}
			} else {
				/* just copy anychar */
				if (yyc == '"') {
					/* stop at last '"' and do not put it in result string */
					break;
				}
				if (i < (DOT_MAXSTRING - 2)) {
					strbuf[i] = (char)yyc;
					i = (i + 1);
				}
			}
		}
		if (nest >= (DOT_MAXSTRING - 2)) {
			fprintf(stderr, "%s(): at line %d file %s string with %d chars is too long and truncated to %d chars\n",
				__FUNCTION__, yylineno, fname, nest, i);
			fflush(stderr);
		}
		/* if string is too long then it will become multiple tokens */
		/* at a "" input string */
		if (strlen(strbuf) == 0) {
			laststring = uniqstring("");
		} else {
			strbuf[i] = (char)0;
			i = (i + 1);
			laststring = uniqstring(strbuf);
		}
		/* laststring has backslash '\' chars to handle in parser */
		yyntokens = (yyntokens + 1);
		TOKEN_RETURN(DOT_STRING, "DOT_STRING", laststring);
	}

	/* '-' as part of edgetype, number or single char */
	if (yyc == '-') {
		/* number can also be id in the grammar depending on context */
		i = 0;
		memset(strbuf, 0, DOT_MAXSTRING);
		/* tmp save in strbuf */
		strbuf[i] = (char)yyc;
		i = (i + 1);
		yyc = wrgetc();
		/* end of file */
		if (yyc == EOF) {
			yyc = wrungetc(yyc);
			charbuf[0] = (char)strbuf[0];
			charbuf[1] = 0x00;
			lastchar = uniqstring(charbuf);
			yyntokens = (yyntokens + 1);
			TOKEN_RETURN(DOT_CHAR, "DOT_CHAR", lastchar);
		}
		/* end of file */
		if (yyc == 0x00) {
			yyc = wrungetc(yyc);
			charbuf[0] = (char)strbuf[0];
			charbuf[1] = 0x00;
			lastchar = uniqstring(charbuf);
			yyntokens = (yyntokens + 1);
			TOKEN_RETURN(DOT_CHAR, "DOT_CHAR", lastchar);
		}
		/* check for directed edge */
		if (yyc == '>') {
			/* seen '-' '>' */
			yyntokens = (yyntokens + 1);
			TOKEN_RETURN(DOT_DEDGE, "DOT_DEDGE", "->");
		}
		/* check for undirected edge */
		if (yyc == '-') {
			/* seen '-' '-' */
			yyntokens = (yyntokens + 1);
			TOKEN_RETURN(DOT_UEDGE, "DOT_UEDGE", "--");
		}
		/* check for number starting with '-' then '.' num or '-' num */
		if ((yyc == '.') || ((yyc >= '0') && (yyc <= '9'))) {
			strbuf[i] = (char)yyc;
			i = (i + 1);
			for (;;) {
				yyc = wrgetc();
				/* end of file */
				if (yyc == EOF) {
					yyc = wrungetc(yyc);
					break;
				}
				/* end of file */
				if (yyc == 0x00) {
					yyc = wrungetc(yyc);
					break;
				}
				if (strbuf[1] == '.') {
					/* at '-.' only numbers to follow */
					if ((yyc == '.') || ((yyc >= '0') && (yyc <= '9'))) {
						strbuf[i] = (char)yyc;
						i = (i + 1);
						if (i >= (DOT_MAXSTRING - 1)) {
							fprintf(stderr, "%s(): number string too long and truncated to %d chars\n",
								__FUNCTION__, (DOT_MAXSTRING - 1));
							fflush(stderr);
							break;
						}
					} else {
						yyc = wrungetc(yyc);
						break;
					}
				} else {
					/* -0.123 is oke. */
					if (((yyc >= '0') && (yyc <= '9')) || (yyc == '.')) {
						strbuf[i] = (char)yyc;
						i = (i + 1);
						if (i >= (DOT_MAXSTRING - 1)) {
							fprintf(stderr, "%s(): number string too long and truncated to %d chars\n",
								__FUNCTION__, (DOT_MAXSTRING - 1));
							fflush(stderr);
							break;
						}
					} else {
						yyc = wrungetc(yyc);
						break;
					}
				}
			}
			/* now optional chars can follow */
			if (i < (DOT_MAXSTRING - 1)) {
				for (;;) {
					yyc = wrgetc();
					/* end of file */
					if (yyc == EOF) {
						yyc = wrungetc(yyc);
						break;
					}
					/* end of file */
					if (yyc == 0x00) {
						yyc = wrungetc(yyc);
						break;
					}
					if ((yyc == '_') || ((yyc >= 'A') && (yyc <= 'Z')) || ((yyc >= 'a') && (yyc <= 'z'))
					    || ((yyc >= 0x80) && (yyc <= 0xff))) {
						strbuf[i] = (char)yyc;
						i = (i + 1);
						if (i >= (DOT_MAXSTRING - 1)) {
							fprintf(stderr, "%s(): number string too long and truncated to %d chars\n",
								__FUNCTION__, (DOT_MAXSTRING - 1));
							fflush(stderr);
							break;
						}
					} else {
						yyc = wrungetc(yyc);
						break;
					}
				}
			}
			/* if number is too long there can be chars left
			 * which will become multiple tokens.
			 */
			/* number can be string with multiple dots and letters examples
			 * "-.", "-...", "-...foo", "-.8", "-1.2" to parse further in parser
			 */
			/* check if only a "-." */
			if (strlen(strbuf) == 2) {
				if (strbuf[1] == '.') {
					/* change "-." into "0" */
					strbuf[0] = '0';
					strbuf[1] = 0x00;
				}
			}
			lastnum = uniqstring(strbuf);
			yyntokens = (yyntokens + 1);
			TOKEN_RETURN(DOT_NUM, "DOT_NUM", lastnum);
		}
		/* it has been stand-alone '-' followed by * to return as char */
		yyc = wrungetc(yyc);
		charbuf[0] = (char)strbuf[0];
		charbuf[1] = 0x00;
		lastchar = uniqstring(charbuf);
		yyntokens = (yyntokens + 1);
		TOKEN_RETURN(DOT_CHAR, "DOT_CHAR", lastchar);
	}

	/* positive number start with '.' or '0' ... '9' */
	if ((yyc == '.') || ((yyc >= '0') && (yyc <= '9'))) {
		/* number can also be id in the grammar depending on context */
		i = 0;
		memset(strbuf, 0, DOT_MAXSTRING);
		/* tmp save in strbuf */
		strbuf[i] = (char)yyc;
		i = (i + 1);
		/* */
		for (;;) {
			yyc = wrgetc();
			/* end of file */
			if (yyc == EOF) {
				yyc = wrungetc(yyc);
				break;
			}
			/* end of file */
			if (yyc == 0x00) {
				yyc = wrungetc(yyc);
				break;
			}
			if (strbuf[0] == '.') {
				/* at '.' only numbers to follow */
				if ((yyc == '.') || ((yyc >= '0') && (yyc <= '9'))) {
					strbuf[i] = (char)yyc;
					i = (i + 1);
					if (i >= (DOT_MAXSTRING - 1)) {
						fprintf(stderr, "%s(): number string too long and truncated to %d chars\n",
							__FUNCTION__, (DOT_MAXSTRING - 1));
						fflush(stderr);
						break;
					}
				} else {
					yyc = wrungetc(yyc);
					break;
				}
			} else {
				/* 0.123 is oke. */
				if (((yyc >= '0') && (yyc <= '9')) || (yyc == '.')) {
					strbuf[i] = (char)yyc;
					i = (i + 1);
					if (i >= (DOT_MAXSTRING - 1)) {
						fprintf(stderr, "%s(): number string too long and truncated to %d chars\n",
							__FUNCTION__, (DOT_MAXSTRING - 1));
						fflush(stderr);
						break;
					}
				} else {
					yyc = wrungetc(yyc);
					break;
				}
			}
		}
		/* now optional chars can follow */
		if (i < (DOT_MAXSTRING - 1)) {
			for (;;) {
				yyc = wrgetc();
				/* end of file */
				if (yyc == EOF) {
					yyc = wrungetc(yyc);
					break;
				}
				/* end of file */
				if (yyc == 0x00) {
					yyc = wrungetc(yyc);
					break;
				}
				if ((yyc == '_') || ((yyc >= 'A') && (yyc <= 'Z')) || ((yyc >= 'a') && (yyc <= 'z'))
				    || ((yyc >= 0x80) && (yyc <= 0xff))) {
					strbuf[i] = (char)yyc;
					i = (i + 1);
					if (i >= (DOT_MAXSTRING - 1)) {
						fprintf(stderr, "%s(): number string too long and truncated to %d chars\n",
							__FUNCTION__, (DOT_MAXSTRING - 1));
						fflush(stderr);
						break;
					}
				} else {
					yyc = wrungetc(yyc);
					break;
				}
			}
		}
		/* if number is too long there can be chars left
		 * which will become multiple tokens.
		 */
		/* number can be string with multiple dots and letters examples
		 * "-.", "-...", "-...foo", "-.8", "-1.2" to parse further in parser
		 */
		/* check if only a "." */
		if (strlen(strbuf) == 1) {
			if (strbuf[0] == '.') {
				charbuf[0] = (char)strbuf[0];
				charbuf[1] = 0x00;
				lastchar = uniqstring(charbuf);
				yyntokens = (yyntokens + 1);
				TOKEN_RETURN(DOT_CHAR, "DOT_CHAR", lastchar);
			}
		}
		lastnum = uniqstring(strbuf);
		yyntokens = (yyntokens + 1);
		TOKEN_RETURN(DOT_NUM, "DOT_NUM", lastnum);
	}

	/* id must start with a letter or underscore or >=0x80 */
	if ((yyc == '_') || ((yyc >= 'A') && (yyc <= 'Z')) || ((yyc >= 'a') && (yyc <= 'z')) || ((yyc >= 0x80) && (yyc <= 0xff))) {
		i = 0;
		memset(strbuf, 0, DOT_MAXSTRING);
		/* set first currently in yyc as id char */
		strbuf[i] = (char)yyc;
		i = (i + 1);
		for (;;) {
			yyc = wrgetc();
			/* end of file */
			if (yyc == EOF) {
				break;
			}
			/* end of file */
			if (yyc == 0x00) {
				break;
			}
			/* id must be followed by begin chars type with optional numbers 0-9 */
			if ((yyc == '_') || ((yyc >= 'A') && (yyc <= 'Z')) || ((yyc >= 'a') && (yyc <= 'z'))
			    || ((yyc >= 0x80) && (yyc <= 0xff))
			    || ((yyc >= '0') && (yyc <= '9'))) {
				strbuf[i] = (char)yyc;
				i = (i + 1);
			} else {
				/* non id char */
				yyc = wrungetc(yyc);
				break;
			}
			if (i >= (DOT_MAXSTRING - 1)) {
				/* too long id string */
				fprintf(stderr, "%s(): id string too long and truncated to %d chars\n", __FUNCTION__,
					(DOT_MAXSTRING - 1));
				fflush(stderr);
				break;
			}
		}
		/* there can be still id chars if longer then DOT_MAXSTRING
		 * that will result this way in multiple id tokens for it.
		 */
		/* return single char as char */
		if (strlen(strbuf) == 1) {
			charbuf[0] = (char)strbuf[0];
			charbuf[1] = 0x00;
			lastchar = uniqstring(charbuf);
			yyntokens = (yyntokens + 1);
			TOKEN_RETURN(DOT_CHAR, "DOT_CHAR", lastchar);
		}
		lastid = uniqstring(strbuf);
		yyntokens = (yyntokens + 1);

		if (yy_flex_debug) {
			printf("%s(): strlen \"%s\" is %d\n", __FUNCTION__, lastid, (int)strlen(lastid));
		}

		/* check for few predefind keywords */
		if (strlen(lastid) == 4) {
			if (lastid[0] == 'N' || lastid[0] == 'n') {
				if (lastid[1] == 'O' || lastid[1] == 'o') {
					if (lastid[2] == 'D' || lastid[2] == 'd') {
						if (lastid[3] == 'E' || lastid[3] == 'e') {
							TOKEN_RETURN(DOT_NODE, "DOT_NODE", lastid);
						}
					}
				}
			} else {
				if (lastid[0] == 'E' || lastid[0] == 'e') {
					if (lastid[1] == 'D' || lastid[1] == 'd') {
						if (lastid[2] == 'G' || lastid[2] == 'g') {
							if (lastid[3] == 'E' || lastid[3] == 'e') {
								TOKEN_RETURN(DOT_EDGE, "DOT_EDGE", lastid);
							}
						}
					}
				}
			}
		}

		/* */
		if (strlen(lastid) == 6) {
			if (lastid[0] == 'S' || lastid[0] == 's') {
				if (lastid[1] == 'T' || lastid[1] == 't') {
					if (lastid[2] == 'R' || lastid[2] == 'r') {
						if (lastid[3] == 'I' || lastid[3] == 'i') {
							if (lastid[4] == 'C' || lastid[4] == 'c') {
								if (lastid[5] == 'T' || lastid[5] == 't') {
									TOKEN_RETURN(DOT_STRICT, "DOT_STRICT", lastid);
								}
							}
						}
					}
				}
			}
		}

		if (strlen(lastid) == 5) {
			if (lastid[0] == 'G' || lastid[0] == 'g') {
				if (lastid[1] == 'R' || lastid[1] == 'r') {
					if (lastid[2] == 'A' || lastid[2] == 'a') {
						if (lastid[3] == 'P' || lastid[3] == 'p') {
							if (lastid[4] == 'H' || lastid[4] == 'h') {
								TOKEN_RETURN(DOT_GRAPH, "DOT_GRAPH", lastid);
							}
						}
					}
				}
			}
		}

		/* */
		if (strlen(lastid) == 7) {
			if (lastid[0] == 'D' || lastid[0] == 'd') {
				if (lastid[1] == 'I' || lastid[1] == 'i') {
					if (lastid[2] == 'G' || lastid[2] == 'g') {
						if (lastid[3] == 'R' || lastid[3] == 'r') {
							if (lastid[4] == 'A' || lastid[4] == 'a') {
								if (lastid[5] == 'P' || lastid[5] == 'p') {
									if (lastid[6] == 'H' || lastid[6] == 'h') {
										TOKEN_RETURN(DOT_DIGRAPH, "DOT_DIGRAPH", lastid);
									}
								}
							}
						}
					}
				}
			}
		}

		/* */
		if (strlen(lastid) == 8) {
			if (lastid[0] == 'S' || lastid[0] == 's') {
				if (lastid[1] == 'U' || lastid[1] == 'u') {
					if (lastid[2] == 'B' || lastid[2] == 'b') {
						if (lastid[3] == 'G' || lastid[3] == 'g') {
							if (lastid[4] == 'R' || lastid[4] == 'r') {
								if (lastid[5] == 'A' || lastid[5] == 'a') {
									if (lastid[6] == 'P' || lastid[6] == 'p') {
										if (lastid[7] == 'H' || lastid[7] == 'h') {
											TOKEN_RETURN(DOT_SUBGRAPH, "DOT_SUBGRAPH",
												     lastid);
										}
									}
								}
							}
						}
					}
				}
			}
		}

		if (yy_flex_debug) {
			printf("%s(): id \"%s\"\n", __FUNCTION__, lastid);
		}

		/* assume it is a regular if here */
		TOKEN_RETURN(DOT_ID, "DOT_ID", lastid);
	}

	/* un-parsed char returned as char token */
	charbuf[0] = (char)yyc;
	charbuf[1] = 0x00;
	lastchar = uniqstring(charbuf);
	yyntokens = (yyntokens + 1);

	fprintf(stderr, "%s(): at line %d file %s unlexed token %d 0x%02x `%c'\n", __FUNCTION__, yylineno, fname, yyntokens,
		(char)yyc, (char)yyc);
	fflush(stderr);

	TOKEN_RETURN(DOT_CHAR, "DOT_CHAR-unlexed-shouldnothappen!", lastchar);
}

/* wrapped fgetc returns EOF at error */
static int wrgetc(void)
{
	int c = 0;
	if (yyin) {
		/* get single char. reading 128Kb at once with fread() is fastest on Linux */
		c = fgetc(yyin);
		/* test stream error */
		if (ferror(yyin)) {
			clearerr(yyin);
			return (EOF);
		}
		/* track line number */
		if (c == '\n') {
			yylineno = (yylineno + 1);
		}
		if (yy_flex_debug > 1) {
			printf("%s(): char 0x%02x '%c' at line %d\n", __FUNCTION__, (char)c, (char)c, yylineno);
			fflush(stdout);
		}
		return (c);
	} else {
		/* some error because there is no stream */
		return (EOF);
	}
}

/* wrapped ungetc returns EOF at error or c at success */
static int wrungetc(int c)
{
	int cc = 0;
	if (yy_flex_debug > 1) {
		printf("%s(): char 0x%02x `%c' at line %d\n", __FUNCTION__, (char)c, (char)c, yylineno);
		fflush(stdout);
	}
	/* track line number */
	if (c == '\n') {
		if (yylineno > 0) {
			yylineno = (yylineno - 1);
		}
	}
	cc = ungetc(c, yyin);
	/* optional check */
	if (cc != c) {
		/* some error */
		fprintf(stderr, "%s(): got 0x%02x expected 0x%02x '%c'\n", __FUNCTION__, (char)cc, (char)c, (char)c);
		fflush(stderr);
	}
	return (cc);
}

/* skip whitespace and comments and return non-ws char or 0 at error or EOF
 * skip c style comment
 * skip c++ style comment
 * skip #line cpp statements
 * skip shell comments #.*
 * whitespace chars are:
 * \t (tab)
 * \r (carriage return)
 * ' ' (space)
 * wrgetc() keeps track of yylineno
 */
static int dotlex_skipws(void)
{
	int c = 0;
	for (;;) {
		c = wrgetc();
		/* eof */
		if (c == EOF) {
			break;
		}
		/* 0 eof */
		if (c == '\0') {
			break;
		}
		/* skip spaces */
		if (c == ' ') {
			continue;
		}
		/* skip tabs */
		if (c == '\t') {
			continue;
		}
		/* skip cr */
		if (c == '\r') {
			continue;
		}
		/* shell comments and #line on 1 single line */
		if (c == '#') {
			for (;;) {
				c = wrgetc();
				/* eof */
				if (c == EOF) {
					break;
				}
				/* 0 eof */
				if (c == '\0') {
					break;
				}
				/* skip until end-of-line */
				if (c == '\n') {
					break;
				}
			}
			/* eof */
			if (c == EOF) {
				break;
			}
			/* 0 eof */
			if (c == '\0') {
				break;
			}
			continue;
		}
		/* possible start c and c++ comment */
		if (c == '/') {
			c = wrgetc();
			if (c == '/') {
				/* start c++ comment with '/' '/' */
				for (;;) {
					c = wrgetc();
					/* eof */
					if (c == EOF) {
						break;
					}
					/* 0 eof */
					if (c == '\0') {
						break;
					}
					/* skip until end-of-line */
					if (c == '\n') {
						break;
					}
				}
				/* eof */
				if (c == EOF) {
					break;
				}
				/* 0 eof */
				if (c == '\0') {
					break;
				}
				continue;
			} else if (c == '*') {
				/* start c comment with '/' '*' */
				for (;;) {
					c = wrgetc();
					/* eof */
					if (c == EOF) {
						break;
					}
					/* 0 eof */
					if (c == '\0') {
						break;
					}
					/* check for end */
					if (c == '*') {
						c = wrgetc();
						/* eof */
						if (c == EOF) {
							break;
						}
						/* 0 eof */
						if (c == '\0') {
							break;
						}
						if (c == '/') {
							/* end of c comment '*' '/' */
							break;
						}
					}
				}
				/* eof */
				if (c == EOF) {
					break;
				}
				/* 0 eof */
				if (c == '\0') {
					break;
				}
				continue;
			} else {
				/* plain '/' to return */
				c = wrungetc(c);
				c = '/';
				break;
			}
		}
		/* something non-ws */
		break;
	}
	/* return first non-ws or 0 or EOF */
	return (c);
}

/* end. */
